######################################################
#WA 2018 Primary Analysis
# When Women Run, Voters Will Follow (Sometimes): 
##Examining the Mobilizing Effect of Female Candidates in the 2014 and 2018 Midterm Elections 
#By Safarpour, Wyckoff Gaynor, Rouse, and Swers #
######################################################

#clean the environment.
rm(list=ls()) 

#Load required packages.
library(arm)
library(plyr)
library(mvtnorm) 
library(stargazer)
library(stats)
library(Hmisc)
library(dplyr)

#setwd.
setwd("/Users/ACS/Dropbox/When Women Run/Revision_PoliticalBehavior/R&R Part 2/Publication Docs/Replication Data and Code/")

#read in WA data.
WA<-read.csv("WA2018PrimaryDataFinal.csv", 
             header = TRUE, stringsAsFactors = F)
             
#Main Model.            
mainmodel<-glm(voted2018primary~ womancandidate+ 
                 female+ womancandidate*female+
					        GenerationZ+ womancandidate*GenerationZ+
                   Millennials+ womancandidate*Millennials+
                   GenerationX+ womancandidate*GenerationX+ 
                   SilentGeneration+ womancandidate*SilentGeneration+ 
                   voted2016+trumpvote2016, data=WA, 
                 family=binomial(link="logit"))

summary(mainmodel)

##Export model results to table for paper.
stargazer(mainmodel,
          out="tableWA2018primaryResults.html",
          type="html", covariate.labels = c("Female Candidate", "Female",  
          									                "Generation Z",
                                            "Millennials",
                                            "Generation X",  
                                            "Silent Generation",
                                            "Voted in 2016 General Election", 
                                            "County Trump Vote 2016", 
                                            "Female Candidate*Generation Z",
                                            "Female Candidate*Female",
                                            "Female Candidate*Millennial",
                                            "Female Candidate*Generation X",
                                            "Female Candidate*Silent Generation"),
          dep.var.labels="Voted in WA 2018 Primary Election", column.labels = "",
          notes        = "Results from logistic regression, standard errors in parentheses. Baseline age category: Baby Boomers.", 
          notes.append = TRUE, notes.align = "l", digits=3, single.row = F)

sample <- WA[complete.cases(WA$voted2018primary, WA$womancandidate, WA$female,
                            WA$GenerationZ, WA$Millennials, WA$GenerationX, 
                            WA$SilentGeneration,
                            WA$voted2016, WA$trumpvote2016)==T,] # Regression sample
rm(WA)

#Predicted Probabilities.

#Overall effect for woman in primary.
options(digits=4)
n_draws <- 1000
set.seed(1714)
vcov<- vcov(mainmodel)
coef<-coef(mainmodel)
sim_coefs <- rmvnorm(n_draws, coef, vcov) #Specify the 1,000 simulated coefficients.
rbind(coef(mainmodel), apply(sim_coefs, 2, mean)) # Check they are close to original

ppwomen0.s <- NULL
for (i in 1:n_draws) {
  ppwomen0.s[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*0 + 
                                   sim_coefs[i,3]*sample$female + 
                                   sim_coefs[i,4]*sample$GenerationZ +
                                   sim_coefs[i,5]*sample$Millennials +
                                   sim_coefs[i,6]*sample$GenerationX +
                                   sim_coefs[i,7]*sample$SilentGeneration+
                                   sim_coefs[i,8]*sample$voted2016+
                                   sim_coefs[i,9]*sample$trumpvote2016+
                                   sim_coefs[i,10]*sample$female*0+
                                   sim_coefs[i,11]*sample$GenerationZ*0 +
                                   sim_coefs[i,12]*sample$Millennials*0 +
                                   sim_coefs[i,13]*sample$GenerationX*0 +
                                   sim_coefs[i,14]*sample$SilentGeneration*0))
}
mean(ppwomen0.s)

ppwomen1.s <- NULL
for (i in 1:n_draws) {
  ppwomen1.s[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*1 + 
                                   sim_coefs[i,3]*sample$female + 
                                   sim_coefs[i,4]*sample$GenerationZ +
                                   sim_coefs[i,5]*sample$Millennials +
                                   sim_coefs[i,6]*sample$GenerationX +
                                   sim_coefs[i,7]*sample$SilentGeneration+
                                   sim_coefs[i,8]*sample$voted2016+
                                   sim_coefs[i,9]*sample$trumpvote2016+
                                   sim_coefs[i,10]*sample$female*1+
                                   sim_coefs[i,11]*sample$GenerationZ*1 +
                                   sim_coefs[i,12]*sample$Millennials*1 +
                                   sim_coefs[i,13]*sample$GenerationX*1 +
                                   sim_coefs[i,14]*sample$SilentGeneration*1))
}
mean(ppwomen1.s) 

effect.01 <-  ppwomen1.s-ppwomen0.s
mean(effect.01) # (0-1 women candidate effect)
quantile(effect.01, c(.025,.975)) #CIs.

rm(ppwomen1.s, ppwomen0.s) #drop what is no longer needed.

##Get Predictions by Generation. 
w0.SG	<- NULL
w1.SG 	<- NULL
w0.BB	<- NULL
w1.BB 	<- NULL
w0.GX	<- NULL
w1.GX 	<- NULL
w0.M	<- NULL
w1.M 	<- NULL
w0.GZ	<- NULL
w1.GZ 	<- NULL

SG <- subset(sample, SilentGeneration==1)
BB <- subset(sample, BabyBoomers==1)
GX <- subset(sample, GenerationX==1) 
M <- subset(sample, Millennials==1) 
GZ <- subset(sample, GenerationZ==1)

for(i in 1:n_draws){ 
  w0.SG[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*0 + 
                              sim_coefs[i,3]*SG$female + 
                              sim_coefs[i,4]*SG$GenerationZ +
                              sim_coefs[i,5]*SG$Millennials +
                              sim_coefs[i,6]*SG$GenerationX +
                              sim_coefs[i,7]*SG$SilentGeneration+
                              sim_coefs[i,8]*SG$voted2016+
                              sim_coefs[i,9]*SG$trumpvote2016+
                              sim_coefs[i,10]*SG$female*0+
                              sim_coefs[i,11]*SG$GenerationZ*0 +
                              sim_coefs[i,12]*SG$Millennials*0 +
                              sim_coefs[i,13]*SG$GenerationX*0 +
                              sim_coefs[i,14]*SG$SilentGeneration*0))
  w1.SG[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*1 + 
                              sim_coefs[i,3]*SG$female + 
                              sim_coefs[i,4]*SG$GenerationZ +
                              sim_coefs[i,5]*SG$Millennials +
                              sim_coefs[i,6]*SG$GenerationX +
                              sim_coefs[i,7]*SG$SilentGeneration+
                              sim_coefs[i,8]*SG$voted2016+
                              sim_coefs[i,9]*SG$trumpvote2016+
                              sim_coefs[i,10]*SG$female*1+
                              sim_coefs[i,11]*SG$GenerationZ*1 +
                              sim_coefs[i,12]*SG$Millennials*1 +
                              sim_coefs[i,13]*SG$GenerationX*1 +
                              sim_coefs[i,14]*SG$SilentGeneration*1))
  w0.BB[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*0 + 
                              sim_coefs[i,3]*BB$female + 
                              sim_coefs[i,4]*BB$GenerationZ +
                              sim_coefs[i,5]*BB$Millennials +
                              sim_coefs[i,6]*BB$GenerationX +
                              sim_coefs[i,7]*BB$SilentGeneration+
                              sim_coefs[i,8]*BB$voted2016+
                              sim_coefs[i,9]*BB$trumpvote2016+
                              sim_coefs[i,10]*BB$female*0+
                              sim_coefs[i,11]*BB$GenerationZ*0 +
                              sim_coefs[i,12]*BB$Millennials*0 +
                              sim_coefs[i,13]*BB$GenerationX*0 +
                              sim_coefs[i,14]*BB$SilentGeneration*0))
  w1.BB[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*1 + 
                              sim_coefs[i,3]*BB$female + 
                              sim_coefs[i,4]*BB$GenerationZ +
                              sim_coefs[i,5]*BB$Millennials +
                              sim_coefs[i,6]*BB$GenerationX +
                              sim_coefs[i,7]*BB$SilentGeneration+
                              sim_coefs[i,8]*BB$voted2016+
                              sim_coefs[i,9]*BB$trumpvote2016+
                              sim_coefs[i,10]*BB$female*1+
                              sim_coefs[i,11]*BB$GenerationZ*1 +
                              sim_coefs[i,12]*BB$Millennials*1 +
                              sim_coefs[i,13]*BB$GenerationX*1 +
                              sim_coefs[i,14]*BB$SilentGeneration*1))
  w0.GX[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*0 + 
                              sim_coefs[i,3]*GX$female + 
                              sim_coefs[i,4]*GX$GenerationZ +
                              sim_coefs[i,5]*GX$Millennials +
                              sim_coefs[i,6]*GX$GenerationX +
                              sim_coefs[i,7]*GX$SilentGeneration+
                              sim_coefs[i,8]*GX$voted2016+
                              sim_coefs[i,9]*GX$trumpvote2016+
                              sim_coefs[i,10]*GX$female*0+
                              sim_coefs[i,11]*GX$GenerationZ*0 +
                              sim_coefs[i,12]*GX$Millennials*0 +
                              sim_coefs[i,13]*GX$GenerationX*0 +
                              sim_coefs[i,14]*GX$SilentGeneration*0))
  w1.GX[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*1 + 
                              sim_coefs[i,3]*GX$female + 
                              sim_coefs[i,4]*GX$GenerationZ +
                              sim_coefs[i,5]*GX$Millennials +
                              sim_coefs[i,6]*GX$GenerationX +
                              sim_coefs[i,7]*GX$SilentGeneration+
                              sim_coefs[i,8]*GX$voted2016+
                              sim_coefs[i,9]*GX$trumpvote2016+
                              sim_coefs[i,10]*GX$female*1+
                              sim_coefs[i,11]*GX$GenerationZ*1 +
                              sim_coefs[i,12]*GX$Millennials*1 +
                              sim_coefs[i,13]*GX$GenerationX*1 +
                              sim_coefs[i,14]*GX$SilentGeneration*1))
  w0.M[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*0 + 
                             sim_coefs[i,3]*M$female + 
                             sim_coefs[i,4]*M$GenerationZ +
                             sim_coefs[i,5]*M$Millennials +
                             sim_coefs[i,6]*M$GenerationX +
                             sim_coefs[i,7]*M$SilentGeneration+
                             sim_coefs[i,8]*M$voted2016+
                             sim_coefs[i,9]*M$trumpvote2016+
                             sim_coefs[i,10]*M$female*0+
                             sim_coefs[i,11]*M$GenerationZ*0 +
                             sim_coefs[i,12]*M$Millennials*0 +
                             sim_coefs[i,13]*M$GenerationX*0 +
                             sim_coefs[i,14]*M$SilentGeneration*0))
  w1.M[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*1 + 
                             sim_coefs[i,3]*M$female + 
                             sim_coefs[i,4]*M$GenerationZ +
                             sim_coefs[i,5]*M$Millennials +
                             sim_coefs[i,6]*M$GenerationX +
                             sim_coefs[i,7]*M$SilentGeneration+
                             sim_coefs[i,8]*M$voted2016+
                             sim_coefs[i,9]*M$trumpvote2016+
                             sim_coefs[i,10]*M$female*1+
                             sim_coefs[i,11]*M$GenerationZ*1 +
                             sim_coefs[i,12]*M$Millennials*1 +
                             sim_coefs[i,13]*M$GenerationX*1 +
                             sim_coefs[i,14]*M$SilentGeneration*1))
  w0.GZ[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*0 + 
                              sim_coefs[i,3]*GZ$female + 
                              sim_coefs[i,4]*GZ$GenerationZ +
                              sim_coefs[i,5]*GZ$Millennials +
                              sim_coefs[i,6]*GZ$GenerationX +
                              sim_coefs[i,7]*GZ$SilentGeneration+
                              sim_coefs[i,8]*GZ$voted2016+
                              sim_coefs[i,9]*GZ$trumpvote2016+
                              sim_coefs[i,10]*GZ$female*0+
                              sim_coefs[i,11]*GZ$GenerationZ*0 +
                              sim_coefs[i,12]*GZ$Millennials*0 +
                              sim_coefs[i,13]*GZ$GenerationX*0 +
                              sim_coefs[i,14]*GZ$SilentGeneration*0))
  w1.GZ[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*1 + 
                              sim_coefs[i,3]*GZ$female + 
                              sim_coefs[i,4]*GZ$GenerationZ +
                              sim_coefs[i,5]*GZ$Millennials +
                              sim_coefs[i,6]*GZ$GenerationX +
                              sim_coefs[i,7]*GZ$SilentGeneration+
                              sim_coefs[i,8]*GZ$voted2016+
                              sim_coefs[i,9]*GZ$trumpvote2016+
                              sim_coefs[i,10]*GZ$female*1+
                              sim_coefs[i,11]*GZ$GenerationZ*1 +
                              sim_coefs[i,12]*GZ$Millennials*1 +
                              sim_coefs[i,13]*GZ$GenerationX*1 +
                              sim_coefs[i,14]*GZ$SilentGeneration*1))}
effect.SG	<- w1.SG - w0.SG 
quantile(effect.SG, c(.025,.975)) 
mean(effect.SG) 

effect.BB	<-  w1.BB - w0.BB
quantile(effect.BB, c(.025,.975)) 
mean(effect.BB)  

effect.GX	<-  w1.GX - w0.GX
quantile(effect.GX, c(.025,.975))  
mean(effect.GX) 

effect.M	<-  w1.M - w0.M
quantile(effect.M, c(.025,.975)) 
mean(effect.M) 

effect.GZ	<-  w1.GZ - w0.GZ
quantile(effect.GZ, c(.025,.975))  
mean(effect.GZ)  

#remove what we no longer need.
rm(SG, w1.SG, w0.SG, BB, w1.BB, w0.BB, GX, w1.GX, w0.GX, M, w1.M, w0.M, GZ, w1.GZ, w0.GZ)

#Effect by gender subgroup.
w0.Men	<- NULL
w1.Men 	<- NULL
w0.Women	<- NULL
w1.Women 	<- NULL

Men <- subset(sample, female==0)
Women <- subset(sample, female==1) 

for(i in 1:n_draws){ 
  w0.Men[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*0 + 
                               sim_coefs[i,3]*Men$female + 
                               sim_coefs[i,4]*Men$GenerationZ +
                               sim_coefs[i,5]*Men$Millennials +
                               sim_coefs[i,6]*Men$GenerationX +
                               sim_coefs[i,7]*Men$SilentGeneration+
                               sim_coefs[i,8]*Men$voted2016+
                               sim_coefs[i,9]*Men$trumpvote2016+
                               sim_coefs[i,10]*Men$female*0+
                               sim_coefs[i,11]*Men$GenerationZ*0 +
                               sim_coefs[i,12]*Men$Millennials*0 +
                               sim_coefs[i,13]*Men$GenerationX*0 +
                               sim_coefs[i,14]*Men$SilentGeneration*0))
  w1.Men[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*1 + 
                               sim_coefs[i,3]*Men$female + 
                               sim_coefs[i,4]*Men$GenerationZ +
                               sim_coefs[i,5]*Men$Millennials +
                               sim_coefs[i,6]*Men$GenerationX +
                               sim_coefs[i,7]*Men$SilentGeneration+
                               sim_coefs[i,8]*Men$voted2016+
                               sim_coefs[i,9]*Men$trumpvote2016+
                               sim_coefs[i,10]*Men$female*1+
                               sim_coefs[i,11]*Men$GenerationZ*1 +
                               sim_coefs[i,12]*Men$Millennials*1 +
                               sim_coefs[i,13]*Men$GenerationX*1 +
                               sim_coefs[i,14]*Men$SilentGeneration*1))
  w0.Women[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*0 + 
                                 sim_coefs[i,3]*Women$female + 
                                 sim_coefs[i,4]*Women$GenerationZ +
                                 sim_coefs[i,5]*Women$Millennials +
                                 sim_coefs[i,6]*Women$GenerationX +
                                 sim_coefs[i,7]*Women$SilentGeneration+
                                 sim_coefs[i,8]*Women$voted2016+
                                 sim_coefs[i,9]*Women$trumpvote2016+
                                 sim_coefs[i,10]*Women$female*0+
                                 sim_coefs[i,11]*Women$GenerationZ*0 +
                                 sim_coefs[i,12]*Women$Millennials*0 +
                                 sim_coefs[i,13]*Women$GenerationX*0 +
                                 sim_coefs[i,14]*Women$SilentGeneration*0))
  w1.Women[i] <- mean(invlogit(sim_coefs[i,1] + sim_coefs[i,2]*1 + 
                                   sim_coefs[i,3]*Women$female + 
                                   sim_coefs[i,4]*Women$GenerationZ +
                                   sim_coefs[i,5]*Women$Millennials +
                                   sim_coefs[i,6]*Women$GenerationX +
                                   sim_coefs[i,7]*Women$SilentGeneration+
                                   sim_coefs[i,8]*Women$voted2016+
                                   sim_coefs[i,9]*Women$trumpvote2016+
                                   sim_coefs[i,10]*Women$female*1+
                                   sim_coefs[i,11]*Women$GenerationZ*1 +
                                   sim_coefs[i,12]*Women$Millennials*1 +
                                   sim_coefs[i,13]*Women$GenerationX*1 +
                                   sim_coefs[i,14]*Women$SilentGeneration*1))
}
effect.Men	<- w1.Men - w0.Men 
quantile(effect.Men, c(.025,.975)) #to get CIs; 
mean(effect.Men) #to get mean effect= 

effect.Women	<-  w1.Women - w0.Women
quantile(effect.Women, c(.025,.975)) #to get CIs; 
mean(effect.Women) 

#remove what we no longer need.
rm(Men, w1.Men, w0.Men, Women, w1.Women, w0.Women)

#Store the Effects.
Election<-c(rep("2018 Primary",8))
Effect<-c(mean(effect.01), mean(effect.Women), mean(effect.Men), mean(effect.GZ), mean(effect.M),mean(effect.GX),mean(effect.BB),mean(effect.SG))
LowerCI<-c(quantile(effect.01, c(.025)),  quantile(effect.Women, c(.025)), quantile(effect.Men, c(.025)), quantile(effect.GZ, c(.025)),quantile(effect.M, c(.025)),quantile(effect.GX, c(.025)),quantile(effect.BB, c(.025)),quantile(effect.SG, c(.025)))
UpperCI<-c(quantile(effect.01, c(.975)), quantile(effect.Women, c(.975)), quantile(effect.Men, c(.975)), quantile(effect.GZ, c(.975)), quantile(effect.M, c(.975)),quantile(effect.GX, c(.975)),quantile(effect.BB, c(.975)),quantile(effect.SG, c(.975)))
WAprimary<-data.frame(Effect, UpperCI, LowerCI, Election) 
WAprimary$`Registrant Subgroup`<-factor(c(1,2,3,4,5,6,7,8), levels=c(1,2,3,4,5,6,7,8),
                                        labels=c("Overall", "Women", "Men", "Gen. Z", "Millennials", "Gen. X", "Baby\nBoomers", "Silent Gen."), ordered = T)
#write effects to data frame.
write.csv(WAprimary, "WA2018PrimaryEffects.csv", row.names=F)

